Important

# R version 4.2.2 (2022-10-31) -- "Innocent and Trusting"
# Copyright (C) 2022 The R Foundation for Statistical Computing
# Platform: x86_64-apple-darwin17.0 (64-bit)

# Author: David Rodrigo Cajas

This R script expects the following files to be in the same folder as the script file to properly run:

Now let’s get to it…

0) Data import

0.1) Working directory and packages

First we need to install the Rstudioapi package

# (Install and) load Rstudio api package
if ("rstudioapi" %in% installed.packages()) {
  library(rstudioapi)
} else {
  install.packages("rstudioapi")
  library(rstudioapi)
} 

Now we will set the working directory to where the R markdown folder is

wd <- dirname(rstudioapi::getSourceEditorContext()$path)
# wd <- "/Users/davidcajasmunoz/Library/CloudStorage/GoogleDrive-dadavid.cajas@gmail.com/Mi unidad/Academia/Postgrado/UvA/Results and experiments track/Experiment 1 - Inoculants and amendments on crops and grasses/9-Analysis/2-Microresp/Samples"

setwd(wd)

Import the list of packages in “required_packages.rds” and install them

# Load list of required packages
required_packages <- readRDS("required_packages.rds")
# Install script's required packages
need_install <- required_packages[!(required_packages) %in% installed.packages()]
if (length(need_install) > 0) {
  install.packages(need_install)
}

[Later on] If you modified this code, don’t forget to update the list of script’s required packages

# required_packages <- names(sessionInfo()$otherPkgs)
# saveRDS(required_packages, "required_packages.rds")

0.2) Importing working data

This chunk imports all pages inside the “Results_microresp_Calc.xlsx” excel file as dataframes of 4 columns and 97 rows, replacing the names of the rows for standardised ones

IMPORTANT: NOTE THAT THIS CODE ASSUMES THE COLUMNS ARE IN A PARTICULAR ORDER:

  • A = Well

  • B = non relevant [ignored]

  • C = Sample_ID

  • D = Absorbance before

  • E = Absorbance after

library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(gtools)

# set the file containing the data
sourcefile <- "Results_microresp_Calc.xlsx"

for (i in 1:length(excel_sheets(sourcefile))) {
  # Import data
  
  df <- read_excel(sourcefile, sheet = i, range = "A1:E97", col_types = c("text", "skip", "text", "numeric", "numeric")) # import columns A to E and rows 1 to 97 from sheet i, skipping the second column and the other 4 in format chr, chr, int, int
  
  # Small adjustments
  
  colnames(df) <- c("well", "sample", "A570_0h", "A570_6h") # rename columns
  df <- mutate(df, 
               row = str_extract(well, "^[A-Z]"),
               col = factor(as.factor(str_extract(well, "\\d+")), levels = c(seq(1,12,1)))
  ) # add separate columns for the coordinates of the sample in the plate (contained in "well" column)
  
  # Wrap up
  
  assign(paste("mr",i, sep=""), df) # name output dataframe
  rm(df,i) # remove auxiliary "df" and "i" objects
}

# list all created dataframes so they can be easily called back
dfs <- mixedsort(ls(pattern = "^mr\\d+$"))

0.3) Import samples metadata

0.3.1) Import metadata from samples processed in Microresp

library(readxl)
library(stringr)
library(tidyr)

#Import sample labels from Microresp spreadsheet

for (i in 1:length(excel_sheets(sourcefile))) {
  # Import data
  
  df <- read_excel(sourcefile, sheet = i, range = "I1:J97", col_names = FALSE, col_types = c("text", "text")) # import columns A to E and rows 1 to 97 from sheet i, skipping the second column and the other 4 in format chr, chr, int, int
  
  # Delete NA rows
  df <- df[complete.cases(df), ]
  
  # Small adjustments
  
  colnames(df) <- c("sample", "condition") # rename columns
  
  # add separate columns for each of the conditions encoded in the "J" column, now named "condition
  df <- separate(df, 
                 col = condition, 
                 into = c("plant", "soil", "treatment", "replicate"), 
                 sep = "_")
  
  # Wrap up
  
  assign(paste("mr",i,"_meta", sep=""), df) # name output dataframe
  rm(df,i) # remove auxiliary "df" and "i" objects
}
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## • `` -> `...1`
## • `` -> `...2`
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1 rows [1].
## New names:
## New names:
## New names:
## New names:
## New names:
## • `` -> `...1`
## • `` -> `...2`
# list all created metadata dataframes so they can be easily called back
mdfs <- mixedsort(ls(pattern = "^mr\\d+_meta$"))

0.3.2) Import experiment metadata

Note that this script imports the metadata from “experiment_metadata.xlsx” assuming that it’s separated by sheets called “treatment_data”, “soil_data” and “plant_data”. The order is not relevant.

# Define source of experiment metadata

sourcemeta <- "experiment_metadata.xlsx"

for (i in excel_sheets(sourcemeta)) {
  # Import data
  
  df <- read_excel(sourcemeta, sheet = i) # import all experiment metadata in separated dataframes per sheet
  
  # Wrap up
  
  assign(i, df) # name output dataframe
  rm(df,i) # remove auxiliary "df" and "i" objects
}

# Add order to treatments
treatment_data$label <- factor(as.factor(treatment_data$label), levels = c("Control", "Disease suppression", "AMF", "Nitrogen fixation", "Phosphate solubilisation"))
treatment_data$applied_product <- factor(as.factor(treatment_data$applied_product), levels = c("No product", "Compete Plus", "MycorGran 2.0", "Vixeran", "NuelloPhos"))

# Small processing of soil data

numeric_cols <- names(soil_data)[sapply(soil_data, function(x) any(grepl("[0-9]", x)))] # Auxiliary object listing the columns that contain numbers
soil_data[numeric_cols] <- lapply(soil_data[numeric_cols], function(x) {
  x <- ifelse(grepl("^<", x), 0, x) # Identify values beginning with "<", which are below detection limitm and replace them with 0
  x <- as.numeric(x)  # Convert to numeric AFTER cleaning
  x
}) # This function replaces values that start with "<" with 0
rm(numeric_cols) # Remove auxiliary object

## On develop: Check soil texture and include in metadata if not there already.

# library(soiltexture)
# library(ggplot2)
# 
# TT.text(data.frame(
#   soil = soil_data$soil,
#   SAND = soil_data$sand_perc,
#   SILT = soil_data$silt_perc,
#   CLAY = soil_data$clay_perc
# ), geo = "USDA"
# )

0.3.3) Extract relevant experiment metadata and add to Microresp metadata dataframes

# Define relevant metadata to extract. This can be modified for further customisation
pick_metadata <- c(colnames(soil_data[,c(3,6,10:14)]), # chosen metadata columns in soil_data
                   colnames(treatment_data[,c(3,5:14)]), # chosen metadata columns in treatment_data
                   colnames(plant_data[,c()])) # chosen metadata columns in plant_data

# Create a function to replace values in a common column. It will be used to replace labels
replace_values <- function(df1, df2, col_name, new_col_name) {
  df1 %>%
    left_join(df2, by = col_name) %>%  # Join with lookup table
    mutate(!!col_name := !!sym(new_col_name)) %>% # Replace original values with labels
    select(-!!sym(new_col_name))
} # replaces the values of the col_name column in the df1 for the values of the new_col_name in the df2, assuming col_name exist in both dataframes

# Add selected metadata and change labels to metadata dataframe

for (i in mdfs) {
  df <- get(i) # get the object
  
  # Apply formulas
  df <- df %>%
    
    # Add defined metadata
    
    left_join(select(plant_data, "plant",any_of(pick_metadata)), by = "plant") %>%
    left_join(select(soil_data, "soil",any_of(pick_metadata)), by = "soil") %>%
    left_join(select(treatment_data, "treatment",any_of(pick_metadata)), by = "treatment") %>%
    
    # Replace compressed labels for full size variables
    
    replace_values(plant_data[,1:2], "plant", "label") %>%
    replace_values(soil_data[,1:2], "soil", "label") %>%
    replace_values(treatment_data[,1:2], "treatment", "label")
  
  # Wrap up
  
  assign(i,df) # write back on the original dataframe
  rm(df,i) # remove auxiliary objects
  
}

0.3.4) Paste Microresp metadata dataframes from 0.3.3) into Microresp dataframes imported in 0.2)

for (i in dfs) {
  df <- get(i) # get the object
  df_meta <- get(paste(i,"_meta",sep = ""))
  
  # Add metadata
  
  df <- left_join(df,df_meta,by = "sample")
  
  # Wrap up
  
  assign(i,df) # write back on the original dataframe
  rm(df,df_meta,i) # remove auxiliary objects
  
}

# Delete auxiliary objects if desired

# rm(pick_metadata,mdfs)

0.4) Import %CO2 calibration values

This is just stored separately in another file called “co2cal.xlsx”

calvalues <- read_excel("co2cal.xlsx", col_names = c("sample","CO2_per"), col_types =c("text","numeric"))

1) Data processing: Operations on created dataframes

1.1) Calculate normalised absorbance values and add calibration %CO2 values from “calvalues” object

The following formula is used to calculate normalised absorbance:

\[\ A_i = \frac{A_{t6}}{A_{t0}}*µ_{{A_{t0}}}\]

for (i in dfs) {
  df <- get(i) # get the object
  
  # Apply formulas
  
  df$AAdjusted <- (df$A570_6h / df$A570_0h)*mean(df$A570_0h) # Normalisation formula
  df$AAdjusted[is.infinite(df$AAdjusted) | is.nan(df$AAdjusted)] <- NA  # Replace NAs and infinite values for NA
  
  # Add calibration values
  
  df <- merge(df, calvalues, by = "sample", all.x = T, sort = F)
  
  # Error check and report
  
  missing_cal <- df$sample %in% c(paste0("C", 1:20),"H2O") & is.na(df$CO2_per)
  if (any(missing_cal)) {
    print(paste("[ Calibration error in:",i,"]","Missing calibration values for:", paste(df$sample[missing_cal], collapse = ", ")))
  }
  
  # Wrap up
  
  assign(i,df) # write back on the original dataframe
  rm(df,i, missing_cal) # remove auxiliary objects
}

1.2) Fitting a model

For easier management, each Microresp dataset will be split into a calibration (to fit the model) and a sample subsets (to analyse later).

## Split dataframes into "calibration" and "sample" subsets

for (i in dfs) {
  df <- get(i) # get the object

  is_sample <- grepl("^S[0-9]+$", df$sample) # logical vector where SX values are TRUE and the rest, FALSE
  cal <- df$sample %in% c(paste0("C", 1:20)
                          ,"H2O" # Include/Exclude H2O from calibration set
                          ) # logical vector where CX and H2O values are TRUE and the rest, FALSE
  
  # Create new dataframes based on the boolean vector just created
  
  df.c <- df[cal, ]
  df.s <- df[is_sample, ]
  
  # Store new dataframes
  
  assign(paste0(i, "_c"), df.c)
  assign(paste0(i, "_s"), df.s)
  
  # Wrap up

  rm(df,i, is_sample, cal, df.c, df.s) # remove auxiliary "df" and "i" objects
}

1.2.1) Create auxiliary objects

dfs_c <- mixedsort(ls(pattern = "^mr\\d+_c$"))
dfs_s <- mixedsort(ls(pattern = "^mr\\d+_s$"))

1.2.2) Fit model

This approach seems a bit complex because it contains 3 nested loops. Nonetheless, essentially it fits the data on each calibration dataset to the calibration values imported assuming the following formula:

\[ \%CO2 = A+\frac{B}{1+D*A_i} \]

The reason for the loops are as follows:

  1. The first loop (for{}) runs iteratively over all calibration datasets, so it gets a model for each one.
  2. The second loop (for{}) runs all different optimisation algorithms for the fitting function in an effort to ensure that a solution is found following the procedure in the third loop. After the third loop is finished, it will check if a “model” was stored. If yes, it will save it into a new object. If not, it will create a warning message.
  3. The third loop (tryCatch{}) gives a more robust approach to errors. tryCatch will “try” the code with the first algorithm defined in the previous loop. If it succeeds, it will jump directly to report the found model. If not, it will create a warning message.
expected_models <- data_frame(paste0("mod_", dfs_c),excel_sheets(sourcefile),rep(FALSE,length(dfs_c))) # Create a dataframe containing the names of the expected models, the excel sheet for the data they fir to and if they were successfully obtained (for new, this column is set to FALSE)
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
colnames(expected_models) <- c("model","datasheet","obtained")

for (i in dfs_c) { 
  # this approach has 3 nested loops. This is the [1]
  
  df <- get(i) # get the object
  
  # Fit model: Nested for() [2] loop was implemented so different optimisation algorithms can by tried
  
  algorithms <- c("default", "port", "plinear") # List of algorithms to try
  
  for (algorithm in algorithms) { 
    
    # The modeling function is further nested [3] in the TryCatch function
    
    tryCatch({ # tryCatch prevents the loop from stopping if the modeling is unsuccessful
    
    # Model itself: It will get stored in an auxiliary "m" object
    m <- nls(CO2_per ~ a + b / (1 + d * AAdjusted), 
             data = df, 
             start = list(a = -2, b = -10, d = -6.8), # the values for a, b and d used as start are taken from the manual
             control = nls.control(minFactor = 1e-10, maxiter = 1e7),
             algorithm = algorithm) # Here is the list of algorithms created in the first for() loop
    
    break # If the previous code lines did not halt, this line will break out of the [2] algorithm loop, effectively stopping it.
    
  } # If the code was halted before (ie: nls() function halted and no model was produced), then the following code will be run.
    
    , error = function(e) {
      cat("[ Error fitting model for", i,"using",algorithm, "algorithm ]", ":\n", conditionMessage(e), "\n")
    }
    , warning = function(w) {
      cat("[ Warning fitting model for", i,"]", ":\n", conditionMessage(w), "\n")
    })
  }
  
  # Wrap up
  
  if (exists("m")) { # If a model exists, a success message and store the model (if fit was successful)
    print(paste("Model for", i, "successful using",algorithm,"algorithm:"))
    print(summary(m))
    
    assign(paste0("mod_", i), m) # Save the model with a given name based on the calibration dataframe name
    expected_models$obtained[str_which(dfs_c, i)] <- TRUE # Flag the model as obtained
  } else { # If a model does not exist, print an error message.
    cat("[ Error: Failed to fit model for", i, "using all algorithms. Skipping. ]\n")
  }
  
  rm(df,i, m, algorithm, algorithms) # remove auxiliary objects
}
## [1] "Model for mr1_c successful using default algorithm:"
## 
## Formula: CO2_per ~ a + b/(1 + d * AAdjusted)
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a  -0.6769     0.1595  -4.243 0.000136 ***
## b  -2.1912     0.5518  -3.971 0.000308 ***
## d  -3.6818     0.2508 -14.678  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.392 on 38 degrees of freedom
## 
## Number of iterations to convergence: 8 
## Achieved convergence tolerance: 2.499e-06
## 
## [ Error fitting model for mr2_c using default algorithm ] :
##  singular gradient 
## [1] "Model for mr2_c successful using port algorithm:"
## 
## Formula: CO2_per ~ a + b/(1 + d * AAdjusted)
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a  -2.2571     2.4432  -0.924    0.372    
## b  -1.0164     1.4207  -0.715    0.487    
## d  -1.1571     0.1499  -7.721 3.29e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6946 on 13 degrees of freedom
## 
## Algorithm "port", convergence message: relative convergence (4)
## 
## [1] "Model for mr3_c successful using default algorithm:"
## 
## Formula: CO2_per ~ a + b/(1 + d * AAdjusted)
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a -0.61938    0.08704  -7.116 7.86e-06 ***
## b -2.23224    0.32930  -6.779 1.30e-05 ***
## d -4.05531    0.15860 -25.570 1.68e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1324 on 13 degrees of freedom
## 
## Number of iterations to convergence: 6 
## Achieved convergence tolerance: 7.167e-08
## 
## [1] "Model for mr4_c successful using default algorithm:"
## 
## Formula: CO2_per ~ a + b/(1 + d * AAdjusted)
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a -0.19868    0.06878  -2.888   0.0127 *  
## b -0.60458    0.08670  -6.973 9.72e-06 ***
## d -4.33246    0.06252 -69.296  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1757 on 13 degrees of freedom
## 
## Number of iterations to convergence: 6 
## Achieved convergence tolerance: 1.744e-07
## 
## [1] "Model for mr5_c successful using default algorithm:"
## 
## Formula: CO2_per ~ a + b/(1 + d * AAdjusted)
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a -0.62129    0.06614  -9.394 3.70e-07 ***
## b -2.03125    0.22314  -9.103 5.29e-07 ***
## d -3.50641    0.09532 -36.785 1.58e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1 on 13 degrees of freedom
## 
## Number of iterations to convergence: 7 
## Achieved convergence tolerance: 2.403e-06
## 
## [1] "Model for mr6_c successful using default algorithm:"
## 
## Formula: CO2_per ~ a + b/(1 + d * AAdjusted)
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a -0.26260    0.06691  -3.925  0.00174 ** 
## b -0.84022    0.10972  -7.658  3.6e-06 ***
## d -4.35786    0.07489 -58.192  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1626 on 13 degrees of freedom
## 
## Number of iterations to convergence: 5 
## Achieved convergence tolerance: 7.77e-06
## 
## [1] "Model for mr7_c successful using default algorithm:"
## 
## Formula: CO2_per ~ a + b/(1 + d * AAdjusted)
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a  -1.2984     0.2534  -5.123 0.000196 ***
## b  -7.6044     3.0289  -2.511 0.026062 *  
## d  -6.0472     1.2108  -4.994 0.000245 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2247 on 13 degrees of freedom
## 
## Number of iterations to convergence: 5 
## Achieved convergence tolerance: 3.911e-06
## 
## [1] "Model for mr8_c successful using default algorithm:"
## 
## Formula: CO2_per ~ a + b/(1 + d * AAdjusted)
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a  -1.3801     0.2098  -6.579 1.77e-05 ***
## b  -5.4077     1.4840  -3.644  0.00297 ** 
## d  -3.9462     0.4478  -8.812 7.64e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1515 on 13 degrees of freedom
## 
## Number of iterations to convergence: 6 
## Achieved convergence tolerance: 1.306e-06
## 
## [1] "Model for mr9_c successful using default algorithm:"
## 
## Formula: CO2_per ~ a + b/(1 + d * AAdjusted)
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a  -0.9041     0.2068  -4.372 0.000756 ***
## b  -4.0341     1.3017  -3.099 0.008462 ** 
## d  -5.2759     0.6334  -8.329 1.43e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.249 on 13 degrees of freedom
## 
## Number of iterations to convergence: 7 
## Achieved convergence tolerance: 1.971e-06
## 
## [1] "Model for mr10_c successful using default algorithm:"
## 
## Formula: CO2_per ~ a + b/(1 + d * AAdjusted)
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a  -1.0449     0.2030  -5.148 6.74e-05 ***
## b  -2.6504     0.7011  -3.781  0.00137 ** 
## d  -3.4769     0.2514 -13.832 4.96e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.225 on 18 degrees of freedom
## 
## Number of iterations to convergence: 8 
## Achieved convergence tolerance: 3.401e-06
## 
## [1] "Model for mr11_c successful using default algorithm:"
## 
## Formula: CO2_per ~ a + b/(1 + d * AAdjusted)
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a -0.63193    0.07548  -8.372 1.27e-07 ***
## b -1.98046    0.26051  -7.602 5.03e-07 ***
## d -4.13415    0.13230 -31.249  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1267 on 18 degrees of freedom
## 
## Number of iterations to convergence: 6 
## Achieved convergence tolerance: 1.208e-06
## 
## [1] "Model for mr12_c successful using default algorithm:"
## 
## Formula: CO2_per ~ a + b/(1 + d * AAdjusted)
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a  -2.1963     0.2567  -8.557 9.27e-08 ***
## b -34.7090    23.7459  -1.462    0.161    
## d -14.2646     7.7155  -1.849    0.081 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1221 on 18 degrees of freedom
## 
## Number of iterations to convergence: 5 
## Achieved convergence tolerance: 3.636e-07
## 
## [ Error fitting model for mr13_c using default algorithm ] :
##  singular gradient 
## [ Error fitting model for mr13_c using port algorithm ] :
##  Convergence failure: singular convergence (7) 
## [ Error fitting model for mr13_c using plinear algorithm ] :
##  singular matrix 'a' in solve 
## [ Error: Failed to fit model for mr13_c using all algorithms. Skipping. ]
## Warning in rm(df, i, m, algorithm, algorithms): object 'm' not found
## [1] "Model for mr14_c successful using default algorithm:"
## 
## Formula: CO2_per ~ a + b/(1 + d * AAdjusted)
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a -0.49432    0.08442  -5.856 1.52e-05 ***
## b -1.62958    0.26731  -6.096 9.26e-06 ***
## d -3.34950    0.11887 -28.177 2.42e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1616 on 18 degrees of freedom
## 
## Number of iterations to convergence: 7 
## Achieved convergence tolerance: 4.621e-07
## 
## [1] "Model for mr15_c successful using default algorithm:"
## 
## Formula: CO2_per ~ a + b/(1 + d * AAdjusted)
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a  -0.9096     0.1408  -6.462 4.43e-06 ***
## b  -3.9693     0.9425  -4.212 0.000524 ***
## d  -4.2857     0.3787 -11.316 1.29e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1618 on 18 degrees of freedom
## 
## Number of iterations to convergence: 4 
## Achieved convergence tolerance: 3.87e-06
## 
## [1] "Model for mr16_c successful using default algorithm:"
## 
## Formula: CO2_per ~ a + b/(1 + d * AAdjusted)
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a -0.78746    0.08451  -9.318 2.62e-08 ***
## b -3.25024    0.47983  -6.774 2.40e-06 ***
## d -4.13666    0.20487 -20.191 8.17e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1156 on 18 degrees of freedom
## 
## Number of iterations to convergence: 6 
## Achieved convergence tolerance: 7.769e-07
## 
## [1] "Model for mr17_c successful using default algorithm:"
## 
## Formula: CO2_per ~ a + b/(1 + d * AAdjusted)
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a -0.49432    0.08442  -5.856 1.52e-05 ***
## b -1.62958    0.26731  -6.096 9.26e-06 ***
## d -3.34950    0.11887 -28.177 2.42e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1616 on 18 degrees of freedom
## 
## Number of iterations to convergence: 7 
## Achieved convergence tolerance: 4.621e-07

It is possible to export a model for later use (ie: a calibration plate)

saveRDS(mod_mr1_c,"microresp_ref_model.rds")

1.3) Estimate %CO2 values for samples

1.3.0) Import previews models if wanted

If desired, a specific model can be imported here:

# mod_import <- readRDS("model3.rds")

1.3.1) Calculate based on a reference model

The constant values (A, B and D) from a given active model are used to calculate %CO2 on the samples based on the same formula stated before.

# Define the active model
active_model <- mod_mr1_c

# Calculate %CO2 based on active model

for (i in dfs_s) {
  df <- get(i) # get the object
  
  # Apply formula and add data
  
  df$CO2_per <- coef(active_model)["a"] + coef(active_model)["b"] / (1 + coef(active_model)["d"] * df$AAdjusted) 
  
  # Add a stamp to recognise which model was used
  
  df$microresp_model <- rep(as.character(quote(active_model)), nrow(df))
  
  # Wrap up
  
  assign(i,df) # write back on the original dataframe
  rm(df,i) # remove auxiliary objects
}

1.3.2) Calculate based on internal models from each plate

# For which plate numbers do you want to use the internal model?
plates_in_model <- c(3,5)

# Define some auxiliary variables
id_in_models <- which(expected_models$datasheet %in% paste0("Plate ", plates_in_model)) # Find the row position for the selected plate numbers. This position will be the same in the expected_models list and in the dfs_s list

# Perform recalculation only for selected plates

for (i in id_in_models) {
  # Get the objects
  df <- get(dfs_s[i])
  in_model <- get(expected_models$model[i])
  
  # Apply formula
  
  df$CO2_per <- coef(in_model)["a"] + coef(in_model)["b"] / (1 + coef(in_model)["d"] * df$AAdjusted)

  # Add a stamp to recognise which model was used
  
  df$microresp_model <- rep(as.character(expected_models$datasheet[i]), nrow(df))
  
  # Wrap up
  
  assign(dfs_s[i],df) # write back on the original dataframe
  rm(df,i, in_model) # remove auxiliary objects
}
rm(plates_in_model, id_in_models)

1.4) Estimate microbial respiration rates

The following formula is used to calculate microbial respiration rate based on the %CO2 detected on each well:

\[ Respiration_{µgCO_2-Cg^{-1}_{dry~soil}h^{-1}} = \frac{ \frac{\%CO_2}{100} \times V_{µL} \times \frac{44}{22.4} \times \frac{12}{44} \times \frac{273}{273+T_{°C}}} { SoilDwt_{g} \times t_{h} } \]

Considering the following values:

  • \(\%CO_2\) is the estimated value calculated before (in % (v/v) ).

  • \(V_{µL}\) is the estimated headspace volume in the (calibration) system (in µL). In this case:

    • \(V_{µL} = V_{Deep~well} + V_{Agar~well} - V_{Agar} - V_{Calibration~solution} = 1200 µL + 400 µL - 150 µL - 250 µL = 1200 µL_{Headspace}\)
  • \(T_{°C}\) is the incubation temperature (in °C). In this case: 26 °C

  • \(SoilDwt_{g}\) is the soil dry weight (in g) added to each well. In this case: 0.5 g

  • \(t_h\) is the incubation time of the system (in h). In this case: 6 h.

Also Note that there are some constant values included:

  • \(µCO_2 = 44 g/mol\)

  • \(µC = 12 g/mol\)

  • \(V_{ideal~gas} = 22.4 L/mol\)

for (i in dfs_s) {
  df <- get(i) # get the object
  
  # Set Microresp system values
  
  tem <- 26 # Temperature, in °C
  v <- 1200 # Headspace volume in µL
  w <- 0.5 # Dry soil weight per well in g
  tim <- 6 # Incubation time in h
  
  # Apply formula and add data
  
  df$Respiration_rate <-  ( (df$CO2_per/100)*v*(44/22.4)*(12/44)*(273/(273+tem)) ) / (w*tim)
  
  # Wrap up
  
  assign(i,df) # write back on the original dataframe
  rm(df,i,tem,v,w,tim) # remove auxiliary objects
} # Fixed system values are declared inside the for loop

1.5) Merge data and export.

All samples dataframes can be now merged into one, preserving a label for their origin (the plate)

# Create an empty list to store the data frames. Don't worry, this is only needed for the following for loop and it will be removed after.
samples_list <- list()

# Loop through the sample data frames, add them to the (temporary) list and merge all samples data in a new dataframe called 'merged_samples'.
for (i in dfs_s) {
  df <- get(i) # get the object
  samples_list[[i]] <- df # Add the data frame to the list
  
  # Merge all data frames in the list into a single data frame
  
  merged_samples <- do.call(rbind, samples_list)
  
  # Wrap up
  
  rm(df,i) # remove auxiliary objects
  
}

merged_samples$plate <- rep(excel_sheets(sourcefile), times = sapply(samples_list, nrow)) # For this line to work, all sheets in the input excel file have to have at least 1 sample and a corresponding "mrx_s" dataframe

rm(samples_list) # remove the list object
head(merged_samples)
##          sample well A570_0h A570_6h row col     plant              soil
## mr1_s.50     S4   A9   1.539   1.046   A   9 Faba bean   Sandy high Phos
## mr1_s.51     S3  A10   1.504   1.100   A  10 Faba bean    Sandy low Phos
## mr1_s.52     S2  A11   1.501   1.105   A  11 Faba bean Sandy not managed
## mr1_s.53     S1  A12   1.448   1.116   A  12 Faba bean Sandy not managed
## mr1_s.62     S4   B9   1.534   0.653   B   9 Faba bean   Sandy high Phos
## mr1_s.63     S3  B10   1.516   1.078   B  10 Faba bean    Sandy low Phos
##                         treatment replicate origin_location    soil_texture
## mr1_s.50      Disease suppression         1        Hoge eng            Sand
## mr1_s.51                  Control         1           Joppe Sandy clay loam
## mr1_s.52      Disease suppression         1     Droevendaal      Loamy sand
## mr1_s.53 Phosphate solubilisation         1     Droevendaal      Loamy sand
## mr1_s.62      Disease suppression         1        Hoge eng            Sand
## mr1_s.63                  Control         1           Joppe Sandy clay loam
##          CaCO3_perc NOM_perc  pH P-CaCl2_mgP/kg P-AL_mgP/kg applied_product
## mr1_s.50        0.0      1.1 5.5            5.4       402.0    Compete Plus
## mr1_s.51        0.5      7.9 5.4            0.3        13.1      No product
## mr1_s.52        0.4      4.7 4.6            0.7       100.0    Compete Plus
## mr1_s.53        0.4      4.7 4.6            0.7       100.0      NuelloPhos
## mr1_s.62        0.0      1.1 5.5            5.4       402.0    Compete Plus
## mr1_s.63        0.5      7.9 5.4            0.3        13.1      No product
##              target_function         active_principle_1 active_principle_2
## mr1_s.50 Disease suppression Bacillus amyloliquefaciens   Bacillus pumilus
## mr1_s.51                <NA>                       <NA>               <NA>
## mr1_s.52 Disease suppression Bacillus amyloliquefaciens   Bacillus pumilus
## mr1_s.53     Nutrient supply    Pseudomonas fluorescens               <NA>
## mr1_s.62 Disease suppression Bacillus amyloliquefaciens   Bacillus pumilus
## mr1_s.63                <NA>                       <NA>               <NA>
##          active_principle_3     active_principle_4      active_principle_5
## mr1_s.50  Bacillus subtilis Bacillus licheniformis Azotobacter chroococcum
## mr1_s.51               <NA>                   <NA>                    <NA>
## mr1_s.52  Bacillus subtilis Bacillus licheniformis Azotobacter chroococcum
## mr1_s.53               <NA>                   <NA>                    <NA>
## mr1_s.62  Bacillus subtilis Bacillus licheniformis Azotobacter chroococcum
## mr1_s.63               <NA>                   <NA>                    <NA>
##              active_principle_6    active_principle_7 active_principle_8
## mr1_s.50 Trichoderma atroviride Trichoderma harzianum               <NA>
## mr1_s.51                   <NA>                  <NA>               <NA>
## mr1_s.52 Trichoderma atroviride Trichoderma harzianum               <NA>
## mr1_s.53                   <NA>                  <NA>               <NA>
## mr1_s.62 Trichoderma atroviride Trichoderma harzianum               <NA>
## mr1_s.63                   <NA>                  <NA>               <NA>
##          active_principle_9 AAdjusted    CO2_per microresp_model
## mr1_s.50               <NA> 0.9535518 0.19579892    active_model
## mr1_s.51               <NA> 1.0261151 0.11186519    active_model
## mr1_s.52               <NA> 1.0328394 0.10489730    active_model
## mr1_s.53               <NA> 1.0813016 0.05810255    active_model
## mr1_s.62               <NA> 0.5972265 1.15083614    active_model
## mr1_s.63               <NA> 0.9976329 0.14281010    active_model
##          Respiration_rate  plate
## mr1_s.50        0.3830848 Test_3
## mr1_s.51        0.2188667 Test_3
## mr1_s.52        0.2052339 Test_3
## mr1_s.53        0.1136789 Test_3
## mr1_s.62        2.2516359 Test_3
## mr1_s.63        0.2794111 Test_3

Export the fully processed, unfiltered dataset:

write.csv(merged_samples, "microresp_processed.csv", row.names = F) # Export as CSV file. The simplest but looses some structure, like factor columns.
saveRDS(merged_samples,"microresp_processed.rds") # Export RDS file, which is an exact copy of the R object but is only readable by specialised software.

The exported dataset contains:

  • Measurements made for all samples (calibration points are excluded) in all plates included in the imported xlsx file.

  • Metadata for all reads (provided sample IDs in results xlsx file match variables from metadata xlsx file).

  • Estimation of \(\%CO_2\) and microbial respiration rate for each measurement based on a specific model selected in step 1.3.1).

2) Plotting data and model

2.1) Plot Model(s)

library(ggplot2)
library(ggpubr)
library(tidyr)
library(ggprism)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
# Color palette for this curve set

## Starting palette

cal_palette <- c("#9E0142", "#D53E4F", "#F46D43", "#FDAE61", "#FEE08B", "#E6F598", "#ABDDA4", "#66C2A5", "#3288BD", "#5E4FA2")

## Create color function from that palette

col_fn <- colorRampPalette(cal_palette)

## Create a given number of colors from the color function

cal_palette <- col_fn(length(excel_sheets(sourcefile)[-1]))

## Add names to the colors

names(cal_palette) <- paste0(excel_sheets(sourcefile)[-1])

## Add samples and main calibration curve
## Add "soil" and "cal"

cal_palette["soil"] <- "#FF0000"  # Red
cal_palette["active model"] <- "#0000FF"   # Blue

# All curves plot.

## First, create the main plot, with the active model
p <- ggplot(mr1, aes(y = CO2_per, x = AAdjusted)) +
  geom_point(aes(color = "active model"), data = get(dfs_c[1])) +
  geom_line(aes(y = fitted(get(expected_models$model[1])), color = "active model"), data = get(dfs_c[1])) +
  geom_hline(yintercept = 0, color = "red") + 
  scale_color_manual(values = cal_palette) +
  theme_prism() + 
  labs(x = "Normalised A570", y = "Theoretical CO2 concentration (%)", title = "Calibration curves")

## Then, iteratively add layers for the other models
for (i in 2:length(dfs_c)) {

# Add calibration data points from each sheet
  p <- p + geom_point(aes_string(color = shQuote(excel_sheets(sourcefile)[i])), data = get(dfs_c[i]))

# Add line for only EXISTING models
  if(expected_models$obtained[i]) {
   p <- p + geom_line(aes_string(y = fitted(get(expected_models$model[i])), color = shQuote(excel_sheets(sourcefile)[i])), data = get(dfs_c[i])) 
  }

# Note: aes() had to be replaced with aes_string() so the color function gets evaluated during each iteration instead of at the end of the loop. shQuote() was added to ensure the names with spaces are handled properly.
rm(i)
}
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Change the name to something meaningful
plot_models <- p 
rm(p)

## Plot as interactive, so you can choose which models to see
ggplotly(plot_models)

Based on this plot, it is advisable to:

  • Filter out Plate 1 and 12.

  • Use the internal calibration model for samples in Plate 3 and 5

2.2) Plot active model with samples

# define some auxiliary variables

## Extract the name used for the X variable in the model (the absorbance)
active_model_x <- active_model[["call"]][["formula"]][[3]][[3]][[3]][[2]][[3]][[3]] 

## Extract the name used for the Y variable in the model (the %CO2)
active_model_y <- active_model[["call"]][["formula"]][[2]] 

# Plot for curve from active model and all samples

plot_samples_in_model <- ggplot(mr1, aes(y = CO2_per, x = AAdjusted)) +
  # Add calibration points layer
  geom_point(aes(color = "active model"), data = mr1_c) + # gets the dataframe that was used to train the model
  # Add calibration model line layer
  geom_line(aes(y = fitted(get(expected_models$model[1])), 
    color = "active model"), data = mr1_c) + # gets the dataframe that was used to train the model
  # Add sample points
  geom_point(aes(color = "soil", 
                 shape = plate
                 ), data = merged_samples) +
  # Add a line at %CO2 = 0
  geom_hline(yintercept = 0, color = "red", aes(alpha = 0.5)) + 
  scale_color_manual(values = cal_palette) +
  theme_prism() + 
  labs(x = "Normalised A570", y = "Theoretical CO2 concentration (%)", title = "calibration curve")
## Warning: `geom_hline()`: Ignoring `mapping` because `yintercept` was provided.
ggplotly(plot_samples_in_model)
## Warning: The shape palette can deal with a maximum of 6 discrete values because more
## than 6 becomes difficult to discriminate
## ℹ you have requested 17 values. Consider specifying shapes manually if you need
##   that many have them.

2.3) All samples

plot_all_samples <- ggplot(filter(merged_samples, plate != "Test_3" # Don't consider calibration plate
              )
       , aes(x = sample, y = CO2_per, color = plate)) +
  geom_point() +
  geom_text(aes(label = rownames(filter(merged_samples, plate != "Test_3"))),
            vjust = -0.5, hjust = 0.5) + 
  geom_boxplot() +
  geom_hline(yintercept = 0, color = "red") + 
  theme_prism() + 
  labs(x = "Sample", y = "CO2 concentration (%)") 
ggplotly(plot_all_samples)

Based on this plot, it might be advisable to check and filter out specific readings for some samples (eg: S34, reading 19 in dataframe mr4_s).

2.4) Sanity check

2.4.1) <0 \(\%CO_2\) values

## Check how many samples fall in <0 values

readings_below_0 <- filter(merged_samples, CO2_per<0)
number_wells_below_0 <- nrow(readings_below_0)
number_samples_below_0 <- length(unique(readings_below_0$sample))
id_samples_below_0 <- unique(readings_below_0[order(readings_below_0$CO2_per, decreasing = T),"sample"]) # in %CO2 decreasing order
extreme_samples <- rbind(merged_samples[order(merged_samples$CO2_per, decreasing = T)[1],], # Highest value
                         merged_samples[order(merged_samples$CO2_per, decreasing = F)[1],]) # Lowest value

# plot only samples with %CO2 <0

plot_below_0_samples <- ggplot(filter(merged_samples, sample %in% id_samples_below_0), aes(x = sample, y = CO2_per, color = plate)) +
  geom_point() +
  # geom_boxplot() +
  geom_hline(yintercept = 0, color = "red") + 
  theme_prism() + 
  labs(x = "Sample", y = "CO2 concentration (%)")
ggplotly(plot_below_0_samples)

Worrying samples:

2.4.2) Position influence

Plate heatmaps
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
heatmaps_samples <- list()

for (i in 1:length(dfs_s)) {
  df <- get(dfs_s[i]) # get the dataframe

  # Create the plots
  heatmaps_samples[[i]] <- ggplot(data = df, aes(x = col, y = row, fill = CO2_per)) +
    geom_tile() + 
    geom_text(aes(label = sample), alpha = 0.3) +
    # scale_fill_gradient(low = "#A64071", high = "#DEBB58") +
    scale_fill_gradient2(low = "#A64071", mid = "white", high = "#DEBB58", midpoint = 0) +
    theme_minimal() +
    scale_y_discrete(limits = rev(unique(merged_samples$row))) + # so the A row goes at the top, as in the actual plate
    scale_x_discrete(position = "top") + # so the col names go on the top, as in the actual plate
    ggtitle(excel_sheets(sourcefile)[i]) +
    coord_fixed() 
    
  
  # Wrap up
  
  rm(df,i) # remove auxiliary objects
}

grid.arrange(grobs = heatmaps_samples, ncol = 4)

print(heatmaps_samples)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

## 
## [[17]]

Position clustering
# Position dispersion plot
ggplot(merged_samples, aes(x = well, y = AAdjusted)) +
  geom_point(aes(color = plate)) +
  geom_text(aes(label = sample, alpha = 0.3), data = filter(merged_samples, AAdjusted>1.5 | AAdjusted<0.6 )) + 
  geom_point(data = mr1_c) + 
  labs(y = "Normalised A570")

Position heatmap (all samples)
# Position heatmap

 ggplot(data = merged_samples, aes(x = col, y = row, fill = AAdjusted)) +
  geom_tile() +
  scale_fill_gradient(low = "#A64071", high = "#DEBB58") + 
  theme_minimal() +
  scale_y_discrete(limits = sort(unique(merged_samples$row), decreasing=T)) + # so the A row goes at the top, as in the actual plate
  scale_x_discrete(position = "top") + # so the col names go on the top, as in the actual plate
  coord_fixed() 

Later: test effect of the curve with lmer()

3) Data analysis

3.0) Import dataset

if (exists("merged_samples")){
  print("Dataset was already loaded")
  } else { merged_samples <- readRDS("microresp_processed.rds")
print("Dataset was loaded from microresp_processed.rds file")
}
## [1] "Dataset was already loaded"

3.1) Sub-setting like crazy: Relevant filters based on analyses performed in 2)

3.1.1) Filter out unwanted reads and group data by relevant variables

working_reads <- merged_samples %>%
  filter(
           # col != "3" & # Column next to the calibration curve
           plate != "Plate 1" & # Calibration failed
           plate != "Plate 12" & # Calibration failed
           plate != "Test_3" # Samples were only test
           ) %>% 
  group_by(soil, treatment, plant) # group by relevant variables

3.1.2) Average multiple Microresp reads from the same sample

# Average spectrophotometric readings and subsequent calculations.

working_samples <- working_reads %>%
    group_by(sample) %>%
    summarize(
      A570_0h = mean(A570_0h, na.rm = TRUE), # Absorbance before exposure
      A570_6h = mean(A570_6h, na.rm = TRUE), # Absorbance after exposure
      AAdjusted = mean(AAdjusted, na.rm = TRUE), # Normalised absorbance
      CO2_per = mean(CO2_per, na.rm = TRUE), # %CO2
      Respiration_rate = mean(Respiration_rate, na.rm = TRUE), # Respiration rate
    ) 

# Extract the data that comes from other sources (metadata), which is the same for all readings of the same sample.

working_meta <- working_reads %>%
  group_by(sample) %>%
  slice(1) %>% # Take the first row for each sample to get metadata
  select(-c(A570_0h, A570_6h, AAdjusted, CO2_per, Respiration_rate)) # Remove averaged columns

# Check back consistency and re-add grouping

if (identical(working_samples$sample,working_meta$sample)) { # First, check if the samples extracted from both processes match exactly
  working_data <- left_join(working_samples, working_meta, by = "sample")
  working_data <- group_by(working_data, soil, treatment, plant) # group by relevant variables)
} else { 
  print("The samples in the reads and metadata don't match! Check for NA values")
    }

head(working_data)
## # A tibble: 6 × 33
## # Groups:   soil, treatment, plant [6]
##   sample A570_0h A570_6h AAdjusted CO2_per Respiration_rate well  row   col  
##   <chr>    <dbl>   <dbl>     <dbl>   <dbl>            <dbl> <chr> <chr> <fct>
## 1 S100      1.43   1.13       1.10  0.0394           0.0771 B3    B     3    
## 2 S101      1.34   0.998      1.05  0.0884           0.173  C7    C     7    
## 3 S102      1.36   1.03       1.06  0.0742           0.145  C3    C     3    
## 4 S103      1.36   1.01       1.04  0.0972           0.190  D8    D     8    
## 5 S104      1.35   0.977      1.01  0.125            0.244  D3    D     3    
## 6 S105      1.44   1.06       1.03  0.111            0.217  E8    E     8    
## # ℹ 24 more variables: plant <chr>, soil <chr>, treatment <fct>,
## #   replicate <chr>, origin_location <chr>, soil_texture <chr>,
## #   CaCO3_perc <dbl>, NOM_perc <dbl>, pH <dbl>, `P-CaCl2_mgP/kg` <dbl>,
## #   `P-AL_mgP/kg` <dbl>, applied_product <fct>, target_function <chr>,
## #   active_principle_1 <chr>, active_principle_2 <chr>,
## #   active_principle_3 <chr>, active_principle_4 <chr>,
## #   active_principle_5 <chr>, active_principle_6 <chr>, …

3.1.3) Export biologically relevant dataset, with filtered noise and averaged technical replicates

write.csv(working_data, "microresp_biofiltered.csv", row.names = F) # Export as CSV file. The simplest but looses some structure, like factor columns.
saveRDS(working_data,"microresp_biofiltered.rds") # Export RDS file, which is an exact copy of the R object but is only readable by specialised software.

3.1.4) Other relevant sub-settings

3.2) Further analysis

3.2.1) Respiration rate for all samples

# 5 color palette 
palette_5col <- c("#EB8900", "#47D7AC", "#FAD847", "#2761C4", "#962C5D", "#CC4389")
# palette_5col <- c("#EB8900", "#2761C4", "#47D7AC", "#CC4389", "#FAD847")

# Treatment palettes based on the 5 color palette
palette_treatments <- palette_5col
names(palette_treatments) <- levels(treatment_data$label)

palette_products <- palette_5col
names(palette_products) <- levels(treatment_data$applied_product)

# Plot all samples, separated by plant system
plot_all_resp <- ggplot(working_data , aes(x = soil, y = Respiration_rate, color = treatment)) +
  # geom_point() +
  geom_boxplot() +
  facet_wrap(~plant) + 
  # geom_col(stat = "identity", position = "dodge") +
  theme_prism() + 
  scale_color_manual(values = palette_treatments) +
  labs(x = "", y = expression(paste("Respiration rate (", mu, "gCO"[2], "-Cg"^"-1", "h"^"-1", ")"))) 
plot_all_resp

# Convert to plotly for interactivity
p_plotly <- ggplotly(plot_all_resp)

# Create a dropdown menu to switch between plants
plant_options <- unique(working_data$plant)

# Function to update the plot based on selected plant
update_plot <- function(plant_name) {
  p_updated <- ggplot(data = subset(working_data, plant == plant_name), aes(x = soil, y = Respiration_rate, color = treatment)) +
  geom_boxplot() +
  theme_prism() + 
  labs(x = "", y = expression(paste("Respiration rate (", mu, "gCO"[2], "-Cg"^"-1", "h"^"-1", ")"))) 
  
  return(ggplotly(p_updated))
}

# Create a list of buttons for the dropdown menu
buttons <- lapply(plant_options, function(plant_name) {
  list(
    method = "update",
    args = list(list(
      p_plotly$x$data <- update_plot(plant_name)$x$data,
      p_plotly$x$layout$title$text <- paste("Respiration Rate by Soil and Treatment (", plant_name, ")")
    )),
    label = plant_name
  )
})

# Add the dropdown menu to the plotly plot
p_plotly <- p_plotly %>% layout(
  updatemenus = list(
    list(
      active = 0,
      buttons = buttons
    )
  )
)

# Display the interactive plot
p_plotly

3.2.2) Respiration rate of a subset

# Plot only a subset
ggplot(filter(working_data, plant == plant_data$label[1]) , aes(x = origin_location, y = Respiration_rate, color = applied_product)) +
  # geom_point() +
  geom_boxplot() +
  # geom_col(stat = "identity", position = "dodge") +
  theme_prism() + 
  scale_y_continuous(limits = c(0,1)) + 
  scale_color_manual(values = palette_products) +
  labs(x = "", y = expression(paste("Respiration rate (", mu, "gCO"[2], "-Cg"^"-1", "h"^"-1", ")"))) 
## Warning: Removed 14 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

4) Statistics

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ gridExtra::combine() masks dplyr::combine()
## ✖ plotly::filter()     masks dplyr::filter(), stats::filter()
## ✖ dplyr::lag()         masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(agricolae)

anova <- aov(Respiration_rate ~ plant * treatment * soil, data = working_data)
summary(anova)
##                       Df Sum Sq Mean Sq F value   Pr(>F)    
## plant                  2  3.520  1.7600  16.875 2.66e-07 ***
## treatment              4  0.130  0.0325   0.312    0.870    
## soil                   3  5.630  1.8768  17.994 5.93e-10 ***
## plant:treatment        8  0.179  0.0224   0.214    0.988    
## plant:soil             6  0.870  0.1450   1.390    0.223    
## treatment:soil        12  0.367  0.0306   0.293    0.990    
## plant:treatment:soil  24  0.599  0.0249   0.239    1.000    
## Residuals            142 14.810  0.1043                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Check assumptions (important!)
# a. Normality of residuals
plot(anova, 2) # Q-Q plot
## Warning: not plotting observations with leverage one:
##   84

# b. Homogeneity of variance (Levene's test)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:purrr':
## 
##     some
## 
## The following object is masked from 'package:gtools':
## 
##     logit
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
leveneTest(Respiration_rate ~ plant * treatment * soil, data = working_data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group  59  1.0596  0.384
##       142
# Example: Tukey for treatment, assuming it was significant in the ANOVA
# HSD.test(working_data, "treatment", alpha=0.05)

# length(unique(working_samples$sample)) # How many unique samples are present in the final dataset?
unique(working_data$plant)
## [1] "Bare soil"   "Faba bean"   "Mixed grass"